#!/usr/bin/env perl
# See copyright, etc in below POD section.
######################################################################

use warnings;
use Getopt::Long;
use IO::File;
use Pod::Usage;
use strict;
use vars qw($Debug $VERSION);

$VERSION = '3.404';

our $Self;

#======================================================================
# main

our $Opt_Debug;
our $Opt_Definitions;
our $Opt_File_Prefix;
our $Opt_Name_Prefix;
our $Opt_Output;
our $Opt_Token_Table;
our $Opt_Verbose;
our $Opt_Yacc = "bison";
our $Opt_Input;

autoflush STDOUT 1;
autoflush STDERR 1;
Getopt::Long::config("no_auto_abbrev");
if (! GetOptions(
          # Local options
          "help"                => \&usage,
          "version"             => sub { print "Version $VERSION\n"; exit(0); },
          "yacc=s"              => \$Opt_Yacc,
          # Passed to Bison
          "t|debug"             => sub { $Opt_Debug = 1; },
          "b|file-prefix=s"     => \$Opt_File_Prefix,
          "d"                   => \$Opt_Definitions,
          "k|token-table"       => \$Opt_Token_Table,
          "o=s"                 => \$Opt_Output,
          "p|name-prefix=s"     => \$Opt_Name_Prefix,
          "v|verbose"           => \$Opt_Verbose,
          "<>"                  => \&parameter,
    )) {
    die "%Error: Bad usage, try 'bisonpre --help'\n";
}

$Opt_Input or die "bisonpre: %Error: input file not specified\n";
$Opt_Output or die "bisonpre: %Error: --o option is required\n";

process();

#----------------------------------------------------------------------

sub usage {
    print "Version $VERSION\n";
    pod2usage(-verbose=>2, -exitval=>0, -output=>\*STDOUT, -noperldoc=>1);
    exit(1);  # Unreachable
}

sub parameter {
    my $param = shift;
    if (!defined $Opt_Input) {
        $Opt_Input = $param;
    } else {
        die "bisonpre: %Error: Unknown parameter: $param\n";
    }
}

#######################################################################

sub process {
    remove_outputs();

    $Self->{bison_version} = bison_version_check();
    my $supports_report = ($Self->{bison_version} >= 2.3);

    clean_input($Opt_Input, tmp_prefix().".y");

    # Run bison
    my $command = ($Opt_Yacc
                   .($Opt_Debug?" -t":"")
                   .($Opt_Definitions?" -d":"")
                   .($Opt_Token_Table?" -k":"")
                   .($Opt_Verbose?" -v":"")
                   .(($Opt_Verbose && $supports_report)?" --report=itemset --report=lookahead":"")
                   # -p required for GLR parsers; they write to -p basename, not -o
                   .($Opt_Name_Prefix?" -p $Opt_Name_Prefix":"")
                   ." -b ".tmp_prefix()
                   ." -o ".tmp_prefix().".c"
                   ." ".tmp_prefix().".y" );

    print "  $command\n";
    system $command;
    my $status = $?;
    if ($status != 0) {
        remove_outputs();
        my $v = bison_version_check();
        die "bisonpre: %Error: $Opt_Yacc version $v run failed due to errors\n";
    }

    clean_output(tmp_prefix().".output",output_prefix().".output", 1,0);
    warning_check(output_prefix().".output");

    clean_output(tmp_prefix().".c",     output_prefix().".c", 0,1);
    clean_output(tmp_prefix().".h",     output_prefix().".h", 0,1);
    remove_tmp();
}

sub tmp_prefix {
    return output_prefix()."_pretmp";
}

sub output_prefix {
    my $o;
    if ($Opt_Output) {
        (my $o = $Opt_Output) =~ s!\.[^.]*$!!;
        return $o;
    } else {
        return $Opt_File_Prefix.".tab";
    }
}

sub remove_tmp {
    unlink(tmp_prefix().".c");  # Ok if errors
    unlink(tmp_prefix().".h");  # Ok if errors
    unlink(tmp_prefix().".output");  # Ok if errors
}

sub remove_outputs {
    remove_tmp();
    unlink(output_prefix().".c");  # Ok if errors
    unlink(output_prefix().".h");  # Ok if errors
    # We don't remove .output file, as it's useful for debugging errors
}

sub bison_version_check {
    my $v = `$Opt_Yacc --version`;
    if ($v && $v =~ /([0-9]+\.[0-9]+)/) {
        my $v = $1;
        ($v >= 1.875) or die "bisonpre: %Error: '$Opt_Yacc' is version $v; version 1.875 or newer is required\n";
        return $v;
    } else {
        die "bisonpre: %Error: '$Opt_Yacc' is not installed, or not working\n";
    }
}

sub clean_output {
    my $filename = shift;
    my $outname = shift || $filename;
    my $is_output = shift;
    my $is_c = shift;
    print "  edit $filename $outname\n";

    my $fh = IO::File->new("<$filename") or die "%Error: $! $filename\n";
    my @lines = $fh->getlines;
    $fh->close;

    (my $basename = tmp_prefix().".") =~ s!.*/!!;
    $basename = quotemeta($basename);
    (my $newbase = $Opt_Input) =~ s!.*/!!;
    $newbase =~ s/\.y/./;

    if ($is_output) {
        my %state_line; my $l=0;
        foreach my $line (@lines) {
            $l++;
            # We add a colon so it's easy to search for the definition
            $state_line{$1} = $l if $line =~ s/^state (\d+)\s*$/state $1:/;
        }
        my @out;
        foreach my $line (@lines) {
            if ($line =~ /^State (\d+) (conflicts)/) {
                chomp $line;
                $line .= " // line $state_line{$1}" if $state_line{$1};
                $line .= "\n";
            }
            push @out, $line;
        }
        @lines = @out; @out = ();
    }
    if ($is_c) {
        my %token_values;
        my $in_en=0;
        foreach my $line (@lines) {
            $in_en=1 if $line =~ /enum\s+yytokentype/;
            $in_en=0 if $line =~ /;/;
            $token_values{$2} = $1 if $in_en && $line =~ /\b(\S+) = (\d+)/;
        }
        my @out;
        foreach my $line (@lines) {
            if (_enaline($line) && $line =~ /BISONPRE_TOKEN_NAMES/) {
                push @out, $line;
                foreach my $tv (sort keys %token_values) {
                    push @out, sprintf("\tcase %d: return \"%s\";\n",
                                       $tv, $token_values{$tv});
                }
                next;
            }
            push @out, $line;
        }
        @lines = @out; @out = ();
    }

    $fh = IO::File->new(">$outname") or die "%Error: $! writing $outname\n";
    foreach my $line (@lines) {
        # Fix filename refs
        $line =~ s!$basename!$newbase!g;
        # Fix bison 2.3 and GCC 4.2.1
        $line =~ s!\(YY_\("!(YY_((char*)"!g;
        # Fix bison 2.3 glr-parser warning about yyerrorloc.YYTYPE::yydummy uninit
        $line =~ s!(YYLTYPE yyerrloc;)!$1 yyerrloc.yydummy=0;/*bisonpre*/!g;
        # Fix bison 3.6.1 unexpected nested-comment
        $line =~ s!/\* "/\*.*\*/"  \*/!!g;
        $fh->write($line);
    }
    $fh->close;
}

sub warning_check {
    my $filename = shift;

    my $fh = IO::File->new("<$filename") or die "%Error: $! $filename\n";
    while (defined(my $line = $fh->getline)) {
        if ($line =~ /(conflicts|warning:|^useless)/i) {
            die "%Error: $filename:$.: $line\n";
        }
    }
    $fh->close;
}

#######################################################################

sub clean_input {
    my $filename = shift;
    my $outname = shift || $filename;  # Can == filename if desired
    print "  edit $filename $outname\n";

    $Self->{filename} = $filename;
    my $fh = IO::File->new("<$filename") or die "%Error: $! $filename\n";
    my @lines = $fh->getlines;
    $fh->close;

    # Find "%tokens<type>:"
    # Find "rule<type>:" and replace with just "rule:"
    my %types;
    my %rules;  $Self->{rules} = \%rules;
    my %tokens;
    my $last_rule;
    my $section = 1;
    {
        my @linesin = @lines;  @lines=();  my $l=0;
        foreach my $line (@linesin) {
            $l++;
            #  ^/ to prevent comments from matching
            $line =~ m!^[a-zA-Z0-9_<>]+:[^/]*[a-zA-Z]! and die "%Error: $filename:$l: Move text on rule line to next line: $line\n";
            if ($line =~ /^%%/) {
                $section++;
                if ($section==2) { $last_rule = undef; }
            }
            elsif ($line =~ s/^([a-zA-Z0-9_]+)<(\S*)>:/$1:/) {
                !$rules{$1}{name} or die "%Error: $filename:$l: Redeclaring '$1': $line\n";
                $types{$2}{$1} = 1;
                $rules{$1}{name} = $1;
                $rules{$1}{type} = $2;
                !$last_rule or die "%Error: $filename:$l: Unterminated previous rule\n";
                $last_rule = $1;
            } elsif ($line =~ /^([a-zA-Z0-9_]+):/) {
                !$rules{$1}{name} or die "%Error: $filename:$l: Redeclaring '$1': $line\n";
                $rules{$1}{name} = $1;
                $rules{$1}{type} = "";
                !$last_rule or die "%Error: $filename:$l: Unterminated previous rule\n";
                $last_rule = $1;
            }
            push @lines, $line;
            # Now clean the line and extract some more info
            (my $cline = $line) =~ s/\/\/.*$/\n/;
            (my $rline = $line) =~ s/\/\/.*$/\n/;
            if ($cline =~ /^\s*;/) {
                $last_rule or die "%Error: $filename:$l: Stray semicolon\n";
                $last_rule = undef;
            } elsif ($last_rule) {
                $rules{$last_rule}{rules_and_productions} .= $cline;
            }
            if ($cline =~ /^%token\s*<(\S+)>\s*(\S+)/) {
                !$tokens{$2} or die "%Error: $filename:$l: Redeclaring '$2': $line\n";
                $tokens{$2} = $1;
            }
            foreach my $tok (split /[^a-zA-Z0-9_]+/, $cline) {
                if ($last_rule && $tok=~/^[a-zA-Z]/) {
                    #print "TT $last_rule $tok\n";
                    $rules{$last_rule}{subrules}{$tok} = 1;
                    $rules{$tok}{parentrules}{$last_rule} = 1;
                }
            }
        }
    }

    #use Data::Dumper; print Dumper(\%rules);

    # Replace BISONPRE_VERSION(ver,,...) with expanded list
    {
        my @linesin = @lines;  @lines=();  my $l=0;
        foreach my $line (@linesin) {
            $l++;
            if (_enaline($line) && $line =~ /BISONPRE_VERSION/) {
                #                            1         3             4
                ($line =~ /BISONPRE_VERSION\((\S+)\s*,\s*((\S+)\s*,)?\s*([^\),]+)\)\s*$/)
                    or die "%Error: $filename:$l: Bad form of BISONPRE_VERSION: $line\n";
                my $ver=$1;  my $ver_max=$3;  my $cmd=$4;
                if ($Self->{bison_version} >= $1
                    && (!$ver_max || $Self->{bison_version} <= $ver_max)) {
                    $line = $cmd."\n";
                } else {
                    $line = "//NOP: $line";
                }
            }
            push @lines, $line;
        }
    }

    # Replace BISONPRE_NOT(type,...) with expanded list
    {
        my @linesin = @lines;  @lines=();  my $l=0;
        foreach my $line (@linesin) {
            $l++;
            if (_enaline($line) && $line =~ /BISONPRE_NOT/) {
                ($line =~ s/BISONPRE_NOT\((\S+)\)\s*(\{[^}]+})\s*$//)
                    or die "%Error: $filename:$l: Bad form of BISONPRE_NOT: $line\n";
                my $endtok = $1;  my $action = $2;
                my @endtoks = split(/,/, $endtok);
                map { $tokens{$_} or die "%Error: $filename:$l: Can't find definition for token: $_\n"
                } @endtoks;
                # Push it all onto one line to avoid error messages changing
                my $bar = "";
              tok:
                foreach my $tok (sort keys %tokens) {
                    foreach (@endtoks) {
                        next tok if $tok eq $_;
                    }
                    if ($endtok ne $tok) {
                        $line .= "\t$bar $tok $action";
                        $bar = "|";
                    }
                }
                $line .= "\n";
            }
            push @lines, $line;
        }
    }

    # Replace BISONPRE_COPY(type,{code})
    {
        my @linesin = @lines;  @lines=();  my $l=0;
        foreach my $line (@linesin) {
            $l++;
            if (_enaline($line) && $line =~ /BISONPRE_COPY/) {
                $line = _bisonpre_copy($line,$l,0);
            }
            push @lines, $line;
        }
    }

    # Replace ~[x]~  - must be after BISONPRE_COPY expansion
    {
        my @linesin = @lines;  @lines=();  my $l=0;
        foreach my $line (@linesin) {
            $l++;
            $line =~ s/~[a-zA-Z0-9_]+~//g;
            push @lines, $line;
        }
    }

    # Find "BISONPRE_TYPES"
    {
        my @linesin = @lines;  @lines=();  my $l=0;
        my $needmore = 0;
        foreach my $line (@linesin) {
            $l++;
            if (_enaline($line) && $line =~ m!//BISONPRE_TYPES!) {
                push @lines, $line;
                foreach my $type (sort keys %types) {
                    next if !$type;
                    my $line = "%type<$type>\t";
                    foreach my $rule (sort keys %{$types{$type}}) {
                        $line.=" ".$rule;
                    }
                    $line .= "\n";
                    push @lines, $line;
                    $needmore++
                }
            } elsif ($needmore) {
                # Bison doesn't have a #line directive, so we need somewhere to insert into
                $line =~ s!^\s*//.*$!!;
                ($line =~ m/^\s*$/)
                    or die "%Error: $filename:$l: Need $needmore more blank lines to keep line numbers are constant\n";
                $needmore--;
            } else {
                push @lines, $line;
            }
        }
    }

    $fh = IO::File->new(">$outname") or die "%Error: $! writing $outname\n";
    foreach my $line (@lines) {
        $fh->write($line);
    }
    $fh->close;
}

sub _bisonpre_copy {
    my $text = shift;
    my $l  = shift;
    my $depth = shift;
    while ($text =~ /BISONPRE_COPY/) {
        ($text =~ s/BISONPRE_COPY(_ONCE)?\((\S+)\s*,\s*\{([^}]*)}\s*\)/{HERE}/)
            or die "%Error: $Self->{filename}:$l: Bad form of BISONPRE_NOT: $text\n";
        my $once = $1; my $rule = $2;  my $code = $3;
        $Self->{rules}{$rule} or die "%Error: $Self->{filename}:$l: Can't find definition for rule: $rule\n";
        if ($depth > 0 && $once) {
            # _ONCE means don't inherit
            $text =~ s/\|[ \t]+{HERE}//;        # Don't OR in nothing
            $text =~ s/{HERE}//;
        } else {
            # Push it all onto one line to avoid error messages changing
            my $insert = $Self->{rules}{$rule}{rules_and_productions};
            $insert =~ s/^\S+://g;      # Strip rule name
            # Recurse so BISONPRE under B
            #print "COPY $l code $code\n";
            #print "COPY $l in   $insert\n";
            $_=$insert; eval("$code; \$_;");  $insert = $_;
            #print "COPY $l out  $insert\n";
            while ($insert =~ s/[ \t\n]+\n/\n/go) {}
            while ($insert =~ s/\n/ /go) {}   # Optional - preserve line numbering
            $text =~ s/{HERE}/$insert/;
        }
        $depth++;
    }
    return $text;
}

sub _enaline {
    my $line = shift;
    return 0 if $line =~ m!//UN!;
    return 1;
}

#######################################################################
__END__

=pod

=head1 NAME

bisonpre - Bison wrapper with pre and post processing

=head1 SYNOPSIS

  bisonpre --yacc bison --debug --verbose --defines X.h -k $< -pX -o X.c


=head1 DESCRIPTION

Bisonpre is a wrapper for the Bison YACC replacement.  Input to Bison is
preprocessed with substitution as described below under EXTENSIONS.  Output
from Bison is checked for additional errors, and corrected to work around
various compile warnings.

=head1 EXTENSIONS

=over 4

=item //BISONPRE_TYPES

This is expanded into %type declarations.

=item ~[a-z]+~

Any text matching ~[a-z]+~ is removed.  This allows optional text to be
used only when the rule containing the ~~ is used in a BISONPRE_COPY.

=item rule_label<type>:

This allows the label declaring a rule to also specify the type of the
rule.  The type will be inserted where /*BISONPRE_TYPES*/ is encountered.

=item BISONPRE_COPY(rule, {code})

Copy the rules and productions from the specified rule, filter through the
Perl code provided in the {} and insert here into the output file.

=item BISONPRE_COPY_ONCE(rule, {code})

As with BISONPRE_COPY, but if called from underneath another BISONPRE_COPY
rule, ignore it.

=item BISONPRE_NOT(token[, token...])

Create a rule that matches every token except for those specified.

=item BISONPRE_VERSION(ver, cmd)

If the bison version is >= the specified version, include the given command.

=back

=head1 ARGUMENTS

=over 4

=item -b file-prefix
=item --file-prefix=file-prefix

Passed to bison.

Specify a prefix to use for all bison output file names.  The names are
chosen as if the input file were named file-prefix.c.

=item -d

Passed to bison.

Write an extra output file containing macro definitions for the token type
names defined in the grammar and the semantic value type YYSTYPE, as well
as a few extern variable declarations.  If the parser output file is named
name.c then this file is named name.h.  This output file is essential if
you wish to put the definition of yylex in a separate source file, because
yylex needs to be able to refer to token type codes and the variable
yylval.

=item --help

Displays this message and program version and exits.

=item -k
=item --token-table

Passed to bison.

This switch causes the name.tab.c output to include a list of token names
in order by their token numbers; this is defined in the array yytname.
Also generated are #defines for YYNTOKENS, YYNNTS, YYNRULES, and YYNSTATES.

=item -t
=item --debug

Passed to bison.

In the parser file, define the macro YYDEBUG to 1 if it is not already
defined, so that the debugging facilities are compiled.

=item -v
=item --verbose

Passed to bison.

Write an extra output file containing verbose descriptions of the parser
states and what is done for each type of look-ahead token in that state.
This file also describes all the conflicts, both those resolved by operator
precedence and the unresolved ones.  The file's name is made by removing
.tab.c or .c from the parser output file name, and adding .output instead.
Therefore, if the input file is foo.y, then the parser file is called
foo.tab.c by default.  As a consequence, the verbose output file is called
foo.output.

=item --version

Print the version number and exit.

=item --yacc

Specify the name of the bison executable, defaults to "bison."

=back

=head1 DISTRIBUTION

This is part of the L<https://www.veripool.org/> free Verilog EDA software
tool suite.  The latest version is available from CPAN and from
L<https://www.veripool.org/>.

Copyright 2008-2020 by Wilson Snyder. This program is free software; you
can redistribute it and/or modify it under the terms of either the GNU
Lesser General Public License Version 3 or the Perl Artistic License Version 2.0.

SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0

=head1 AUTHORS

Wilson Snyder <wsnyder@wsnyder.org>

=head1 SEE ALSO

C<bison>

=cut

######################################################################
### Local Variables:
### compile-command: "./bisonpre "
### End:
